The purpose of this notebook is to assess isotopic separation of organic matter sources in a particular setting, and then test the ability of an AA-CSIA-based food web model to diagnose the relative importance of those sources to a food web or higher order consumer. The results here are diagnostic of the model’s efficacy in a specific setting, and should be used to inform the interpretation of the food web model’s output when applied to natural samples of consumers from that same environment.
This notebook is composed of the following sections:
The model code contained herein represents the base code for our stable isotope-based food web model as of 11/20/2024.
In the first few chunks, the user should enter all of the basic information that the model code will need to proceed with analysis.
The first chunk identifies tracers to be included in the model and how to treat them.
################################################################################
## IDENTIFYING SOURCE AND DESCRIPTIVE VARIABLES ##
# Indicate the name of the column describing the organic matter source to which each sample belongs.
# This column will be renamed to "Group"
Source.Variable <- "Group"
# Indicate the names representing each possible source of organic matter to the food web/consumer.
# The order of this vector will describe the order these groups are referenced in figures.
# The model can accomodate 2-6 organic matter sources, but will need to be modified to accomodate more.
Sources <- c("Surface","Large","Small")
# Indicate the names of all the consumer types also present in this column.
# This could be a generic identifier like "consumer" but should not be left empty here or in the spreadsheet
Consumers <- c("Mixed","Taxa")
# List the name of any additional variables that should be stored for analysis.
# The data in these columns must be complete for all samples.
Descriptive.Variables <- c("Location","Epoch","Event","Size","Depth","Type")
################################################################################
## SPECIFYING TRACERS TO BE USED IN MODEL ##
# Tracer names should match column names in the data spreadsheet
# Specify the names for all tracers with constant trophic discrimination and their uncertainty(i.e., TDF(protozoan) = TDF(metazoan))
Tracers.constTDF <- c("d15NAla", "d15NPro", "d15NSer", "d15NGly", "d15NLys", "d15NThr")
SDTracers.constTDF <- c("SDd15NAla", "SDd15NPro", "SDd15NSer", "SDd15NGly", "SDd15NLys", "SDd15NThr")
# Specify the names for all tracers with variable trophic discrimination and their uncertainty(i.e., TDF(protozoan) != TDF(metazoan))
Tracers.varTDF <- c("d15NGlx", "d15NAsx")
SDTracers.varTDF <- c("SDd15NGlx", "SDd15NAsx")
# Specify the names for all conservative tracers and their uncertainty (i.e., no trophic discrimination is expected)
Tracers.non <- c("d15NPhe")
SDTracers.non <- c("SDd15NPhe")
# Is there a subset of these tracers that should be mean normalized?
meannorm <- TRUE
# If TRUE, which should be mean normalized?
meannorm.which <- c()
SDmeannorm.which <- c()
# Of these tracers, which do you NOT want to be used to fit the model? Estimated values at the base of the food web will be calculated, but they will NOT affect mixing model solutions.
Tracers.exclude <- c("d15NSer", "d15NGly", "d15NLys")#,"d15NPro","d15NAsx")
SDTracers.exclude <- c("SDd15NSer", "SDd15NGly", "SDd15NLys")#,"SDd15NPro","SDd15NAsx")
## Additional Information ##
# Shea et al. (in prep) identified the following amino acids as having constant TDFs
# Tracers.constTDF <- c("d15NAla","d15NPro","d15NGly","d15NSer","d15NPhe","d15NLys","d15NThr")
# SDTracers.constTDF <- c("SDd15NAla","SDd15NPro","SDd15NGly","SDd15NSer","SDd15NPhe","SDd15NLys","SDd15NThr")
# Shea et al. (in prep) identified the following amino acids as having variable TDFs
# Tracers.varTDF <- c("d15NGlx","d15NAsx","d15NVal","d15NLeu","d15NIle")
# SDTracers.varTDF <- c("SDd15NGlx","SDd15NAsx","SDd15NVal","SDd15NLeu","SDd15NIle")
################################################################################
## Making some master lists of tracers - no changes necessary here ##
# All tracers
Tracers.all <- c(
Tracers.constTDF,
Tracers.varTDF,
Tracers.non
)
SDTracers.all <- c(
SDTracers.constTDF,
SDTracers.varTDF,
SDTracers.non
)
# Tracers that fractionate
Tracers.frac <- c(
Tracers.constTDF,
Tracers.varTDF
)
SDTracers.frac <- c(
SDTracers.constTDF,
SDTracers.varTDF
)
# Only the tracers to be used to fit the model
Tracers.mixing <- Tracers.all[-which(Tracers.all %in% Tracers.exclude)]
SDTracers.mixing <- SDTracers.all[-which(SDTracers.all %in% SDTracers.exclude)]
# Tracers that fractionate and will be used to fit the model
Tracers.mixfrac <- Tracers.mixing[which(
Tracers.mixing %in% Tracers.frac
)]
SDTracers.mixfrac <- SDTracers.mixing[which(
SDTracers.mixing %in% SDTracers.frac
)]
################################################################################
## SPECIFYING CHOICES FOR TDF(PROTOZOAN) AND TDF(METAZOAN) ##
# Note: default values are those used in Shea et al. (in prep)
## What TDFs and SDs should be used to describe amino acid δ15N fractionation in metazoans?
## Note that these TDFs should NOT be normalized to any source amino acid.
TDF_meta <- data.frame("d15NAla" = 6.3, "SDd15NAla" = 2.6,
"d15NGly" = 2.9, "SDd15NGly" = 3.1,
"d15NThr" =-4.9, "SDd15NThr" = 3.5,
"d15NSer" = 2.6, "SDd15NSer" = 3.2,
"d15NVal" = 4.4, "SDd15NVal" = 2.6,
"d15NLeu" = 5.6, "SDd15NLeu" = 2.4,
"d15NIle" = 5.5, "SDd15NIle" = 2.4,
"d15NPro" = 5.8, "SDd15NPro" = 1.7,
"d15NAsx" = 5.7, "SDd15NAsx" = 1.9,
"d15NMet" = 1.6, "SDd15NMet" = 2.6,
"d15NGlx" = 8.0, "SDd15NGlx" = 1.7,
"d15NPhe" = 0.3, "SDd15NPhe" = 0.5,
"d15NTyr" = NA , "SDd15NTyr" = NA,
"d15NLys" = 1.2, "SDd15NLys" = 1.2)
## What TDFs and SDs should be used to describe amino acid δ15N fractionation in protozoans?
# Note that these TDFs should NOT be normalized to any source amino acid.
TDF_proto = data.frame("d15NAla" = 6.3, "SDd15NAla" = 2.6,
"d15NGly" = 2.9, "SDd15NGly" = 3.1,
"d15NThr" =-2.1, "SDd15NThr" = 1.0,
"d15NSer" = 2.6, "SDd15NSer" = 3.2,
"d15NVal" = 0.7, "SDd15NVal" = 1.6,
"d15NLeu" = 1.4, "SDd15NLeu" = 0.6,
"d15NIle" =-0.5, "SDd15NIle" = 2.7,
"d15NPro" = 5.8, "SDd15NPro" = 1.7,
"d15NAsx" = 0.8, "SDd15NAsx" = 1.4,
"d15NMet" = NA , "SDd15NMet" = NA ,
"d15NGlx" = 0.5, "SDd15NGlx" = 1.0,
"d15NPhe" = 0.3, "SDd15NPhe" = 0.5,
"d15NTyr" = NA , "SDd15NTyr" = NA,
"d15NLys" = 1.2, "SDd15NLys" = 1.2)
# Define the Order you would like tracers referenced in, if any
Order <- c("d15NGlx", "d15NAsx", "d15NAla", "d15NIle", "d15NLeu", "d15NPro", "d15NVal",
"d15NGly", "d15NSer", "d15NPhe", "d15NLys", "d15NThr", "SAA",
"d13CGlx", "d13CAsx", "d13CAla", "d13CIle", "d13CLeu", "d13CPro", "d13CVal",
"d13CGly", "d13CSer", "d13CPhe", "d13CLys", "d13CThr", "EAA")
SDOrder <- c("SDd15NGlx", "SDd15NAsx", "SDd15NAla", "SDd15NIle", "SDd15NLeu", "SDd15NPro", "SDd15NVal",
"SDd15NGly", "SDd15NSer", "SDd15NPhe", "SDd15NLys", "SDd15NThr", "SDSAA",
"SDd13CGlx", "SDd13CAsx", "SDd13CAla", "SDd13CIle", "SDd13CLeu", "SDd13CPro", "SDd13CVal",
"SDd13CGly", "SDd13CSer", "SDd13CPhe", "SDd13CLys", "SDd13CThr", "SDEAA")
# If all tracers used aren't specified in the Order vector then it will be redefined
if(length(which(Order %in% Tracers.all))<length(Tracers.all)){
Order <- Tracers.all
SDOrder <- SDTracers.all
}
The next thing we will do is import the data. All of the data for organic matter sources and consumers should be contained in one Excel spreadsheet. Each column will be either a descriptive variable or tracer. The first row will include all variable/tracer names and each subsequent row will be a sample. The tracer data and descriptive data need to be complete for every sample included in this analysis. Any sample with an empty cell will be dropped.
## All data should be stored in one, single .xlsx file.
## Columns containing tracer values, uncertainties, and descriptive variables should match those defined in the chunk above.
################################################################################
## IMPORTING DATA ##
# Define the location of .xlsx file, and sheet name if multiple sheets are present.
Data.all <- read_excel("Data/AA-CSIA_OSP.xlsx",
sheet = "combined")[
c(Source.Variable,Descriptive.Variables,Tracers.all,SDTracers.all)
]
New names:
• `` -> `...7`
colnames(Data.all)[colnames(Data.all)==Source.Variable] <- "Group"
variables <- c("Group", Descriptive.Variables)
# Defining the preferred Order in which to reference organic matter sources
Data.all$Group <- factor(Data.all$Group, levels = c(Sources,Consumers))
# To define the order that specific descriptive variables should be referenced, adapt the below example.
# Data.all["Size"] <- factor(Data.all[["Size"]], levels = c(
# c("0.3-1 μm", "1-5 μm", "1-6 μm", "6-51 μm", ">51 μm", "Sediment Trap",
# "0.2-0.5 mm", "0.5-1.0 mm", "1-2 mm", "2-5 mm", ">5 mm")
# ))
Next we will mean normalize the data for a subset of tracers, based on the information given above. This is particularly useful if essential amino acid \(\d13C\) values are to be included in the model. Note that the mean value of mean normalized tracers can be added as an additional tracer if that argument is made “TRUE” below.
Data.all.orig <- Data.all
# If mean normalizing some of the data
if(meannorm == TRUE) {
# Calculating the mean value of those AAs in each sample
MN <- rowMeans(Data.all[meannorm.which])
SD <- c()
for (i in 1:nrow(Data.all)) {
SD <- append(SD, SDmean(Data.all.orig[i,SDmeannorm.which]))
}
# Subtracting that from the EAA δ13C values for each sample
# and replacing raw δ13C values in main data frame with mean normalized ones
Data.all[meannorm.which] <- sweep(Data.all[meannorm.which], 1, MN, "-")
# Can include the mean values as an additional tracer
include_meannorm_mean <- FALSE # make true to include mean values as separate tracer
if(include_meannorm_mean == TRUE){
Data.all$meannorm_mean <- MN
Data.all$SDmeannorm_mean <- SD
Tracers.all <- append(Tracers.all, "EAA")
SDTracers.all <- append(SDTracers.all, "SDEAA")
}
}
# we'll also need the mean values of tracers in organic matter sources later...
# first calculating the mean tracer value for each organic matter source group
src.mn <-
aggregate(Data.all[Tracers.all], # aggregate source data
by=list(Group = Data.all[["Group"]]), # by organic matter source group
FUN = mean, na.rm=TRUE)[-1] # taking a mean
# and the SD
# we will calculate the standard deviation within each population
src.SD <-
aggregate(Data.all[Tracers.all], # aggregate source data
by=list(Group = Data.all[["Group"]]), # by organic matter source group
FUN = sd, na.rm=TRUE)[-1] # calculating SD within the population
Now we’ll pull out the organic matter source data data as a separate data frame.
################################################################################
## ISOLATING ORGANIC MATTER SOURCE DATA ##
# Now we'll grab the data on organic matter sources from the master data frame.
Data.Sources <-
Data.all[
which(as.factor(Data.all$Group) %in% Sources),
]
# Further subsetting data....
Data.Sources <-
subset(Data.Sources,
Group == "Large" |
(Group == "Surface" & Size != ">51 μm" & Size != "6-51 μm") |
((Group == "Small" | Group == "Submicron") &
Depth >= 190)
)
# If a sample is missing data it will be removed in this step
Data.Sources <- na.omit(Data.Sources)
First lets visualize the value of each tracer in each organic matter source.
Sources.long <- melt(Data.Sources, id.vars=c(variables), measure.vars =c(Tracers.mixing),
variable.name = "Tracer", value.name = "Value")
# Defining the preferred Order in which to reference tracers.
Sources.long$Tracer <- factor(Sources.long$Tracer, levels = Order)
AA_plots <-
ggplot(Sources.long, aes(x = Tracer, y = Value, color = Group, group=Group))+
geom_point(alpha=0.4, position = position_dodge(width = 0.8))+
labs(color="Organic Matter Source", shape="Food Web Base", fill="Zooplankton Samples")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.3, hjust=1))
AA_plots
Next, to visualize the between group patterns in the data we will do some multivariate analyses. We’ll start by carrying out a PCA. This should give us a decent of idea of what kind of if between-group separation is a major component of variation in this data set. It will also help us visualize which tracers are driving separation between certain organic matter sources and which are providing redundant information.
# Fitting PCA and adding Sample and Type as a supplemental qualitative variable
PCA = PCA(Data.Sources[c(variables,Tracers.mixing)], scale.unit = FALSE, quali.sup = variables, graph = FALSE)
# ## Uncomment these lines to see some diagnostics of how the PCA ran
# # Plot component variance
# fviz_eig(PCA, addlabels = FALSE, geom = "bar")
#
# # Print a summary of the PCA results
# summary(PCA)
# Plot PC1&2 results and project variable vectors and print
fviz_pca_biplot(PCA, axes = c(1,2), # axes tell which components to plot
geom = "point", addEllipses = TRUE, col.ind = Data.Sources[["Group"]], col.var = 'grey50', repel=TRUE, # some display settings
label = c("quali", "var")) # labelling
We’ll also try fitting an LDA to see if this is can work as a better tool to visualize our mixing space.
ntypes = nlevels(as.factor(Data.Sources[["Group"]]))
# fitting the model with leave one out cross validation
LDA.test = lda(Group ~ . ,data = Data.Sources[c("Group",Tracers.mixing)], CV = TRUE,
prior = rep(1/ntypes, ntypes))
Warning in lda.default(x, grouping, ...): groups Mixed Taxa are empty
# ## uncomment these lines to see the results of leave one out cross-validation
# ## and see some LDA model diagnostics
# print model result
# LDA.train
# # create a table which compares the classification of the LDA model to the actual producer type
# ct.prod.norm <- table(Data.Sources[["Group"]],
# LDA.test$class)
# # total percent of samples correctly classified is the sum of the diagonal of this table
# noquote(c('% successfully categorized: ', sum(diag(prop.table(ct.prod.norm))))) #85% effective
# Refitting the model using all of the available training data
LDA.full = lda(Group ~ . ,data = Data.Sources[c("Group",Tracers.mixing)], CV = FALSE, prior = rep(1/ntypes, ntypes))
Warning in lda.default(x, grouping, ...): groups Mixed Taxa are empty
# ## uncomment this line to see a summary of the full LDA
# LDA.full
# store locations of training data in LD space for later plotting
pred.train = predict(LDA.full, Data.Sources[Tracers.mixing])
class.train = data.frame('Type' = Data.Sources[["Group"]], pred.train$x)
# Generate biplot with training data only
plot.mix.LD12 = ggplot(data = class.train,
aes(x = LD2, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.3, size=3) +
stat_ellipse(type = 't', alpha = 0.6)+
theme(legend.position = 'none')
plot.mix.LD13 = ggplot(data = class.train,
aes(x = LD3, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.3, size=3) +
stat_ellipse(type = 't', alpha = 0.6)+
labs(color = "Training Data")
grid.arrange(ncol = 2, widths = c(1,1.2), plot.mix.LD12, plot.mix.LD13, top = 'LDA of Organic Matter Sources') + theme(legend.position = 'top')
NULL
# SET "eval = TRUE" TO INCLUDE
# We'll also plot LD1, 2, and 3 on 3D axes
plot_ly(data=class.train, x=~LD1, y=~LD2, z=~LD3,
type="scatter3d", mode="markers", color=class.train$Type,
marker = list(line = list(color = "1", width = 0.5)))
Next we will simulate some zooplankton data as a way to test the efficacy of the model under different ecological scenarios. We will simulate it using known ecological parameters and the same TDFs that will be supplied to the food web model as a prior. If the organic matter source data provides adequate separation of mixing end member and TDF are well enough constrained relative to the length of the food web expect in wild samples, then the model should return parameters wimilar to those defined in the following chunk.
Note that there are two methods for generating mixing parameters describing the relative contributins of organic matter sources to the base of the food web:
Sim_Zoop(
zoops.n = 10,
PTS = 1,
MTS = 1,
PTS.sd = 0.5,
MTS.sd = 0.5,
TDF_m = TDF_meta[Tracers.all], # trophic discrimination factors
TDF_m.sd = TDF_meta[SDTracers.all], # SD of trophic discrimination factors
TDF_p = TDF_proto[Tracers.all], # trophic discrimination factors
TDF_p.sd = TDF_proto[SDTracers.all], # SD of trophic discrimination factors
Sources = Sources,
Data.Source = Data.Sources,
Tracers.all = Tracers.all,
variables = variables,
Use_Dirichlet = TRUE,
zoops.f = zoops.f
)
Figure 1: Ternary diagram showing the fractional contribution of our three sources (small and large particle and actively transported material) to the base of our simulated zooplankton food web.
Let’s plot out the tracer values of organic matter sources and zooplankton.
Zoops.long <- melt(Data.Zoops, id.vars=c(variables), measure.vars =c(Tracers.mixing),
variable.name = "Tracer", value.name = "Value")
Base.long <- melt(base.sim, id.vars=c(variables), measure.vars =c(Tracers.mixing),
variable.name = "Tracer", value.name = "Value")
AA_plots <-
ggplot(Sources.long)+
geom_point(aes(x = Tracer, y = Value, color = Group), alpha=0.4,
position = position_nudge(x = 0.1))+
geom_point(data = Zoops.long,
aes(x = Tracer, y = Value, shape = ""
),color="black",
position = position_nudge(x = -0.1))+
geom_point(data = Base.long,
aes(x = Tracer, y = Value, fill = ""
),color="brown",
position = position_nudge(x = 0))+
ylab("Value")+
labs(color="Organic Matter Source", shape="Simulated Zooplankton Samples", fill="Simulated Food Web Base")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.3, hjust=1))
AA_plots
In Order to better visualize the data we’ve generated in multivariate space we’ll fit an LDA to the source data and then plot the zooplankton in that LD space.
# poject zooplankton into LD space and predict classification
pred.zoops = predict(LDA.full, Data.Zoops)
#class.zoops = data.frame('Class' = pred.zoops$class, pred.zoops$x)
# generate data frame which stores predicted classification of particles indexed by size and depth
class.zoops = data.frame('Predicted' = pred.zoops$class, pred.zoops$x, lapply(Data.Zoops[,variables], as.character))
# Predicted probabilities of class membership
head(pred.zoops$posterior,n=dim(pred.zoops$posterior))
Surface Large Small
1 1.285854e-36 1.110795e-01 8.889205e-01
2 8.391540e-28 9.998527e-01 1.473001e-04
3 1.381015e-19 9.999999e-01 1.044465e-07
4 1.402704e-35 1.485743e-01 8.514257e-01
5 2.079425e-46 4.032309e-05 9.999597e-01
6 2.166672e-24 9.999765e-01 2.349474e-05
7 5.590833e-34 6.558944e-01 3.441056e-01
8 1.706362e-50 2.216999e-05 9.999778e-01
9 6.918328e-39 3.356757e-01 6.643243e-01
10 1.144983e-58 6.473497e-09 1.000000e+00
# linear discriminants
head(pred.zoops$x,n=dim(pred.zoops$x))
LD1 LD2 LD3
1 11.805288 -4.901042 -2.524617
2 9.250330 -5.881015 -2.601967
3 6.747982 -5.680171 -2.286215
4 11.548289 -4.744798 -2.429760
5 13.838557 -4.337532 -2.534342
6 8.256582 -5.441997 -2.381819
7 11.193361 -5.175273 -2.551514
8 14.890150 -5.249578 -2.955605
9 12.485805 -6.093154 -3.011917
10 16.469005 -4.173730 -2.728300
# poject food web base into LD space and predict classification
pred.base = predict(LDA.full, base.sim)
# generate data frame which stores predicted classification of particles indexed by size and depth
class.base = data.frame('Predicted' = pred.base$class, pred.base$x, lapply(Data.Zoops[,variables], as.character))
# # Predicted probabilities of class membership
# head(pred.base$posterior,n=dim(pred.base$posterior))
# # linear discriminants
# head(pred.base$x,n=dim(pred.base$x))
# Generate biplot with Zooplankton overlayed onto training data
plot.mix.LD12 = ggplot(data = class.train,
aes(x = LD2, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.3) +
stat_ellipse(type = 't', alpha = 0.6)+
geom_point(data = class.zoops,
aes(x = LD2, y = LD1, shape = "Zooplankton"),color = 'black')+
geom_point(data = class.base,
aes(x = LD2, y = LD1, shape = "Food Web Base"),color = 'red')+
# geom_text(data = class.zoops,
# aes(label=Depth),hjust=-0.2, vjust=1.1)+
# coord_cartesian(xlim=c(-5,5), ylim=c(-7.5,5)) +
theme(legend.position = 'none')
plot.mix.LD13 = ggplot(data = class.train,
aes(x = LD3, y = LD1, color = Type), size = 2) +
geom_point(alpha = 0.3) +
stat_ellipse(type = 't', alpha = 0.6)+
geom_point(data = class.zoops,
aes(x = LD3, y = LD1, shape = "Zooplankton"),color = 'black')+
geom_point(data = class.base,
aes(x = LD3, y = LD1, shape = "Food Web Base"),color = 'red')+
# geom_point(data = class.zoops,
# aes(x = LD3, y = LD1, shape = Predicted), color = 'black')+
# geom_text(data = class.zoops,
# aes(label=Depth),hjust=-0.2, vjust=1.1)+
# coord_cartesian(xlim=c(-5,5), ylim=c(-7.5,5)) +
labs(shape = "Simulated Data") + labs(color = "Training Data")
grid.arrange(ncol = 2, widths = c(1,1.6), plot.mix.LD12, plot.mix.LD13, top = 'LDA of Organic Matter Sources') + theme(legend.position = 'top')
NULL
Next, we will use Markov Chain Monte Carlo to find PDFs describing the most likely solutions to our mixing problem. First we will write a BUGS model, then use JAGS to do the MCMC.
Organizing the input data for MCMC.
N_T <- length(Tracers.all) # number of tracers
N_S <- length(Sources) # number of organic matter sources
N_Z <- nrow(Data.Zoops) # number of zooplankton samples
sd.anl <- 0.52
sdTDF_scaling_factor <- 1
sdY_scaling_factor <- 1
Data.1 <- list(
## How many tracers will be used in this model?
N_T = N_T,
## How many zooplankton samples are there?
N_Z = N_Z,
## Where should the model look for zooplankton amino acid isotope data?
Y = as.matrix(Data.Zoops[Tracers.all]),
## and the analytical uncertainty in those measurements
sd_Y = as.matrix(Data.Zoops[SDTracers.all])*sdY_scaling_factor,
## Which tracers do not fractionate?
i_T_non = which(Tracers.all %in% Tracers.non),
## Which tracers do fractionate?
i_T_frac = which(Tracers.all %in% Tracers.frac),
## Which tracers have constant TDFs throughout the food web?
i_T_const = which(Tracers.all %in% Tracers.constTDF),
## Which tracers have variable TDFs throughout the food web?
i_T_var = which(Tracers.all %in% Tracers.varTDF),
## Which tracers will be used in the mixing model and see the data?
i_T_mix = which(Tracers.all %in% Tracers.mixing),
## Where should the model look for TDF data and its associated uncertainty?
# for metazoan TDFs
TDF_m = as.numeric(TDF_meta[Tracers.frac]),
sdTDF_m = as.numeric(TDF_meta[SDTracers.frac])*sdTDF_scaling_factor,
# for protozoan TDFs
TDF_p = as.numeric(TDF_proto[Tracers.frac]),
sdTDF_p = as.numeric(TDF_proto[SDTracers.frac])*sdTDF_scaling_factor
)
## Where should the model look for organic matter source data?
## making generalized names A-F for a max of six possible organic matter sources
Sources.alpha <- c("X_A","X_B","X_C","X_D","X_E","X_F")
SDSources.alpha <- c("sdX_A","sdX_B","sdX_C","sdX_D","sdX_E","sdX_F")
nsams_alpha <- c("N_A","N_B","N_C","N_D","N_E","N_F")
## Loop through all sources described in setup chunks above to add that data
## to the Data.1 list as individual matrices for each source
for (i in 1:N_S) {
sams_source <- Data.Sources[Data.Sources[["Group"]]==Sources[i],c(Tracers.all,SDTracers.all)]
Data.1[[Sources.alpha[i]]] = as.matrix(sams_source[Tracers.all])
Data.1[[SDSources.alpha[i]]] = as.double(colMeans(sams_source[SDTracers.all]))
Data.1[[nsams_alpha[i]]] = sum((Data.Sources[["Group"]]==Sources[i])*1)
}
# Setting i_T_ vectors =0 if no tracers in that ccategory were included
if(length(Data.1$i_T_non)==0){
Data.1$i_T_non <- 0
}
if(length(Data.1$i_T_frac)==0){
Data.1$i_T_frac <- 0
}
if(length(Data.1$i_T_const)==0){
Data.1$i_T_const <- 0
}
if(length(Data.1$i_T_var)==0){
Data.1$i_T_var <- 0
}
and defining our initial values.
set.seed(666) # Set seed for repeatability
Nchains <- 3 # Number of parallel Markov Chains to run
inits_1 <- vector(mode="list", length=Nchains) # initial values
means_alpha <- c("mean_A","mean_B","mean_C","mean_D","mean_E","mean_F") # names for organic matter sources
sd_alpha <- c("sd_A","sd_B","sd_C","sd_D","sd_E","sd_F") # and their uncertainty
# This function will generate starting values for each chain and save them as a list
fun_init_1 <- function(i) {
inits <-
list(
# We will basically start TDFs nea their expected value
# TDF_meta = runif(length(TDF_meta[Tracers.frac]),
# as.double(TDF_meta[Tracers.frac]-TDF_meta[SDTracers.frac]),
# as.double(TDF_meta[Tracers.frac]+TDF_meta[SDTracers.frac])),
# TDF_proto = runif(length(TDF_proto[Tracers.frac]),
# as.double(TDF_proto[Tracers.frac]-TDF_proto[SDTracers.frac]),
# as.double(TDF_proto[Tracers.frac]+TDF_proto[SDTracers.frac])),
pz = rdirichlet(n=N_Z, rep(1, N_S)), # mixing coefficients can take any starting composition
PTS = runif(n=N_Z, 0, 5), # PTS can start from 0 to 5
sdPTS = rtgamma(n=N_Z, 0.001, 0.001, 10^-50), # SDPTS gets an inverse gamma
MTS = runif(n=N_Z, 0, 5), # MTS can start from 0 to 5
sdMTS = rtgamma(n=N_Z, 0.001, 0.001, 10^-50), # SDMTS get and inverse gamma
.RNG.seed = i+1,
.RNG.name = c("base::Super-Duper",
"base::Wichmann-Hill",
"base::Marsaglia-Multicarry")[i %% 3 + 1]
)
# We define starting values for sources in a separate loop so we can be flexible with the number of sources included in the model
for (j in 1:N_S) {
inits[[means_alpha[j]]] = runif(N_T, -40, 20)
inits[[sd_alpha[j]]] = runif(N_T, 0, 2)
}
return(inits)
}
for (i in 1:Nchains) inits_1[[i]] <- fun_init_1(i)
Now we’ll define our BUGS model.
if(Data.1$i_T_non[1]==0){Tracers_NoFrac <- FALSE}else{Tracers_NoFrac <- TRUE}
if(Data.1$i_T_const[1]==0){Tracers_ConstFrac <- FALSE}else{Tracers_ConstFrac <- TRUE}
if(Data.1$i_T_var[1]==0){Tracers_VarFrac <- FALSE}else{Tracers_VarFrac <- TRUE}
OMSM <- OMSM_Gen_Model(
N_S = N_S,
Tracers_NoFrac = TRUE,
Tracers_ConstFrac = TRUE,
Tracers_VarFrac = TRUE
)
Now we can run the MCMC!
samsPerChain <- 10000 # needed below for narrative, so we give it a name.
monitor_vars = c("mean_A", "sd_A",
"mean_B", "sd_B",
"mean_C", "sd_C",
# "mean_D", "sd_D",
"mean_b", "mean_z",
"TDF_meta", "sdTDF_m",
"TDF_proto","sdTDF_p",
"pz" , "FWL" , "PTS", "MTS")
rjo_1 <- # S3 object of class "runjags"
run.jags(model = OMSM,
data = Data.1,
inits = inits_1,
silent.jags = FALSE,
n.chains = Nchains,
adapt = 5000,
burnin = 40000,
thin = 6,
sample = samsPerChain,
method = "parallel",
modules = "glm",
monitor = monitor_vars
)
Calling 3 simulations using the parallel method...
Following the progress of chain 1 (the program will wait for all chains
to finish before continuing):
Welcome to JAGS 4.3.1 on Fri Feb 7 13:38:39 2025
JAGS is free software and comes with ABSOLUTELY NO WARRANTY
Loading module: basemod: ok
Loading module: bugs: ok
. Loading module: glm: ok
. . Reading data file data.txt
. Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 267
Unobserved stochastic nodes: 104
Total graph size: 1776
WARNING: Unused variable(s) in data table:
sdX_A
sdX_B
sdX_C
. Reading parameter file inits1.txt
. Initializing model
. Adapting 5000
-------------------------------------------------| 5000
++++++++++++++++++++++++++++++++++++++++++++++++++ 100%
Adaptation successful
. Updating 40000
-------------------------------------------------| 40000
************************************************** 100%
. . . . . . . . . . . . . . . . . Updating 60000
-------------------------------------------------| 60000
************************************************** 100%
. . . . Updating 0
. Deleting model
.
All chains have finished
Simulation complete. Reading coda files...
Coda files loaded successfully
Note: Summary statistics were not produced as there are >50 monitored
variables
[To override this behaviour see ?add.summary and ?runjags.options]
FALSEFinished running the simulation
Here we run some diagnostics. Initially we use John K. Kruschke’s function diagMCMC to check out model parameters and make sure things are running properly. When these diagnostics look good, we set eval=FALSE in the chunk header and made a nicer parameter plot comparing known parameter values and the MCMC posteriors for those parameters, which is seen below.
## Kruschke's utility functions (edited by NF)
# edit "parname" to print additional parameter diagnostics as desired
# numbers in hard brackets pertain to samples and tracers
if (c(TRUE,FALSE)[1]) source("Utilities/DBDA2E-utilities.R")
diagMCMC(rjo_1$mcmc, parName="FWL[1]")
Error in data[1:nobs, , drop = FALSE] : incorrect number of dimensions
[1] "Warning: coda::gelman.plot fails for FWL[1]"
Figure 2: Means of tracers 1 and 2 for prey types A,B,C.
diagMCMC(rjo_1$mcmc, parName="MTS[1]")
Error in data[1:nobs, , drop = FALSE] : incorrect number of dimensions
[1] "Warning: coda::gelman.plot fails for MTS[1]"
Figure 3: Means of tracers 1 and 2 for prey types A,B,C.
diagMCMC(rjo_1$mcmc, parName="PTS[1]")
Error in data[1:nobs, , drop = FALSE] : incorrect number of dimensions
[1] "Warning: coda::gelman.plot fails for PTS[1]"
Figure 4: Means of tracers 1 and 2 for prey types A,B,C.
diagMCMC(rjo_1$mcmc, parName="pz[1,1]")
Error in data[1:nobs, , drop = FALSE] : incorrect number of dimensions
[1] "Warning: coda::gelman.plot fails for pz[1,1]"
Figure 5: Means of tracers 1 and 2 for prey types A,B,C.
diagMCMC(rjo_1$mcmc, parName="pz[1,2]")
Error in data[1:nobs, , drop = FALSE] : incorrect number of dimensions
[1] "Warning: coda::gelman.plot fails for pz[1,2]"
Figure 6: Means of tracers 1 and 2 for prey types A,B,C.
diagMCMC(rjo_1$mcmc, parName="pz[1,3]")
Error in data[1:nobs, , drop = FALSE] : incorrect number of dimensions
[1] "Warning: coda::gelman.plot fails for pz[1,3]"
Figure 7: Means of tracers 1 and 2 for prey types A,B,C.
# diagMCMC(rjo_1$mcmc, parName="pz[1,4]")
diagMCMC(rjo_1$mcmc, parName="mean_z[1,9]")
Error in data[1:nobs, , drop = FALSE] : incorrect number of dimensions
[1] "Warning: coda::gelman.plot fails for mean_z[1,9]"
Figure 8: Means of tracers 1 and 2 for prey types A,B,C.
Now let’s generate some data structures to store the MCMC output and other relevant data from generating plots and figures. There are a few different data types we need to get out of the MCMC output
We’ll look at these one at a time.
OMSM_Global_Extract(
rjo_1 = rjo_1,
Sources = Sources,
Data.Zoops = Data.Zoops,
Tracers.all = Tracers.all,
Tracers.frac = Tracers.frac,
Tracers.varTDF = Tracers.varTDF
)
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
No id variables; using all as measure variables
Let’s generate plots of the MCMC posteriors for global model parameters. Mainly here we’re interested in making sure that the mean \(\mathrm{\delta^{15}\text{N}_\text{AA}}\) and \(\mathrm{\delta^{13}\text{C}_\text{AA}}\) values that the model used in mixing models agree well with the data, and looking to see what the model selected for \(\mathrm{\Delta^{15}\text{N}_\text{AA}}\) values.
# We're going to import the literature D15N data to add to our global posterior plots for comparison
D15N_lit <- read_excel("~/University_of_Hawaii/SIA_work/EXPORTS/Data/Literature/McMahon_McCarthy_ES15-00698_Supplement1.xlsx",
sheet = "Proto-vs-Metazoans", n_max = 21)
D15N_lit_long <- melt(D15N_lit, id.vars=c("ProMet"), measure.vars = Tracers.frac,
variable.name="Tracer", value.name = "Value")
theme_set(theme_classic2()+
theme(panel.grid.major.x = element_line(colour = "grey95"),
panel.grid.major.y = element_line(colour = "grey95")))
## Plotting posterior Δ15N values compared to true values
labels1 <- c('d15NGlx'="Glx",'d15NAsx'="Asx",'d15NAla'="Ala",'d15NIle'="Ile",'d15NLeu'="Leu",'d15NPro'="Pro",'d15NVal'="Val",'d15NGly'="Gly",'d15NSer'="Ser",'d15NPhe'="Phe",'d15NLys'="Lys",'d15NThr'="Thr")
labels2 <- c('d15NGlx'="Glx",'d15NAsx'="Asx",'d15NIle'="Ile",'d15NLeu'="Leu",'d15NVal'="Val")
# TDF_meta.long <- melt(TDF_meta[Tracers.mixfrac], variable.name = "Tracer", value.name = "Value")
#
# plot_TDF_meta <-
# ggplot()+
# geom_density(data = posts.global.long$TDF_meta$samples[
# which(posts.global.long$TDF_meta$samples$Tracer %in% Tracers.mixfrac),],
# aes(x=Value), fill="grey90", color="grey10", size=0.5)+
# geom_line(data = posts.global.long$TDF_meta$HDI95[
# which(posts.global.long$TDF_meta$HDI95$Tracer %in% Tracers.mixfrac),],
# aes(x=Value, y=rep(c(0,0),length(Tracers.mixfrac)), color=""),
# size=2,lineend="round")+
# geom_point(data=TDF_meta.long[
# which(TDF_meta.long$Tracer %in% Tracers.mixfrac),],
# aes(x=Value, y=0, fill=""),
# shape=24,color="grey10",size=5, stroke=0.75)+
# geom_point(data = subset(D15N_lit_long[
# which(D15N_lit_long$Tracer %in% Tracers.mixfrac),], ProMet == "Metazoan"),
# aes(x=Value, y=0, shape = ""))+
# scale_shape_manual(values = c(17,17))+
# ggtitle(expression(Delta^{15}*N[meta]))+
# xlab(expression(delta^{15}*N~("\u2030")))+
# facet_wrap(~Tracer, scales = "free", nrow = 2, labeller = as_labeller(labels1))+
# scale_x_continuous(n.breaks=4)+
# scale_color_manual(values = c("red3","steelblue"))+
# scale_fill_manual(values = c("steelblue","red3"))+
# labs(fill="True Value",color="95% HDI",shape="Literature Studies")+
# theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(),
# axis.title.y = element_blank(),plot.title = element_text(hjust = 0.5))
#
# TDF_proto.long <- melt(TDF_proto[Tracers.varTDF], variable.name = "Tracer", value.name = "Value")
# plot_TDF_proto <-
# ggplot()+
# geom_density(data = posts.global.long$TDF_proto$samples[
# which(posts.global.long$TDF_proto$samples$Tracer %in% Tracers.mixfrac),],
# aes(x=Value), fill="grey90", color="grey10", size=0.5)+
# geom_line(data = posts.global.long$TDF_proto$HDI95,
# aes(x=Value, y=rep(c(0,0),nTracers.varTDF), color=""),
# size=2,lineend="round")+
# geom_point(data=TDF_proto.long,
# aes(x=Value, y=0, fill=""),
# shape=24,color="grey10",size=5, stroke=0.75)+
# geom_point(data = subset(D15N_lit_long[
# which(D15N_lit_long$Tracer %in% Tracers.varTDF),], ProMet == "Protozoan"),
# aes(x=Value, y=0, shape = ""))+
# scale_shape_manual(values = c(17,17))+
# ggtitle(expression(Delta^{15}*N[proto]))+
# xlab(expression(delta^{15}*N~("\u2030")))+
# facet_wrap(~Tracer, scales = "free", nrow = 2, labeller = as_labeller(labels2))+
# scale_x_continuous(n.breaks=4)+
# scale_color_manual(values = c("red3","steelblue"))+
# scale_fill_manual(values = c("steelblue","red3"))+
# labs(fill="True Value",color="95% HDI",shape="Literature Studies")+
# theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(),
# axis.title.y = element_blank(),plot.title = element_text(hjust = 0.5),
# legend.position = "none")
#
# ggarrange(nrow=1, ncol=3, widths = c(16,1,4), common.legend = TRUE, legend="bottom",
# plot_TDF_meta, ggplot()+geom_blank()+theme_void(), plot_TDF_proto)
## plotting posterior δ15N values for organic matter sources compared to data
plot_source <-
ggplot()+
geom_density(data = posts.global.long$mean_Sources$samples[
which(posts.global.long$mean_Sources$samples$Tracer %in% Tracers.mixing),],
aes(x=Value, fill=Group), alpha=0.8, color="grey10", size=0.5)+
geom_point(data=Sources.long[
which(Sources.long$Tracer %in% Tracers.mixing),],
aes(x=Value, y=0, fill=Group, shape = "Source Data"),
color="grey10",size=2,stroke=0.75,
show.legend = FALSE)+
scale_shape_manual(values=c(24))+
xlab("Value")+
facet_wrap(~Tracer, scales = "free", nrow=2)+
scale_x_continuous(n.breaks=4)+
labs(fill="Organic Matter Source",color="95% HDI")+
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(),
axis.title.y = element_blank(),plot.title = element_text(hjust = 0.5),
legend.position="bottom")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
plot_source
OMSM_Zoop_Extract(
rjo_1 = rjo_1,
Sources = Sources,
Data.Zoops = Data.Zoops,
Tracers.all = Tracers.all,
Tracers.frac = Tracers.frac,
Tracers.varTDF = Tracers.varTDF
)
Now let’s generate plots of the MCMC posteriors for the zooplankton sample-specific parameters. We’ll want to plot the model posteriors for the tracer values at the base of the food web. We’ll do this for all the individual tracers, but also try and visualize the multivariate patterns using LDA.Note that all tracers are plotted here, including those that were not used to fit the model, though those are not included in LDAs.
## Generating AA δ15N comparison plot
posts.zoops.long$base$thin$Group <- as.factor(posts.zoops.long$base$thin$Group)
AAplots <-
ggplot(Sources.long)+
geom_point(aes(x = Tracer, y = Value, color = Group),
position = position_nudge(x = 0.2))+
geom_point(data = Zoops.long,
aes(x = Tracer, y = Value, fill="simulated zooplankton"), pch = "triangle",
position = position_nudge(x = -0.2))+
geom_point(data = posts.zoops.long$base$thin[which(posts.zoops.long$base$thin$Tracer %in% Tracers.mixing),],
aes(x = Tracer, y = Value), color = "grey60", alpha=0.1, pch=16,
position = position_dodge2(width=0.2))+
geom_point(data = posts.zoops.long$base$mean[which(posts.zoops.long$base$mean$Tracer %in% Tracers.mixing),],
aes(x = Tracer, y = Value, shape="posterior mean"), color = "brown", alpha=1,
position = position_dodge2(width = 0.2))+
# position = position_nudge(x = 0.25))+
geom_point(data = Base.long,
aes(x = Tracer, y = Value, shape="true value"), color = "red", alpha=1,
position = position_dodge2(width = 0.2))+
labs(color="Organic Matter Source", shape="Food Web Base", fill="Zooplankton Samples")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.3, hjust=1))
AAplots
## Generating LDA comparison plot
# poject zooplankton into LD space and predict classification
pred.base.MCMC = predict(LDA.full,
posts.zoops$base$mode)
#class.zoops = data.frame('Class' = pred.zoops$class, pred.zoops$x)
# generate data frame which stores predicted classification of particles indexed by size and depth
class.base.MCMC = data.frame('Predicted' = pred.base.MCMC$class, pred.base.MCMC$x,
lapply(posts.zoops$base$mode[variables],as.character))
# # Predicted probabilities of class membership
# head(pred.base.MCMC$posterior,n=dim(pred.base.MCMC$posterior))
# # linear discriminants
# head(pred.base.MCMC$x,n=dim(pred.base.MCMC$x))
pred.base.sams <- predict(LDA.full,
posts.zoops$base$thin[Tracers.mixing])
# generate data frame which stores predicted classification of particles indexed by size and depth
class.base.sams = data.frame('Predicted' = pred.base.sams$class, pred.base.sams$x,
lapply(posts.zoops$base$thin[variables],as.character))
# Predicted probabilities of class membership
# head(pred.base.sams$posterior,n=dim(pred.base.sams$posterior))
# linear discriminants
# head(pred.base.sams$x,n=dim(pred.base.sams$x))
ggplot(data = class.train,
aes(x = LD2, y = LD1), size = 2) +
geom_point(aes(shape=Type), alpha = 1, color="grey20") +
stat_ellipse(aes(group=Type),type = 't', alpha = 0.8)+
stat_ellipse(data = class.base.sams,
aes(x = LD2, y = LD1, color=Group, fill=Group),
type = 't', alpha = 0.5, size=0.5, geom = "polygon")+
# geom_point(data = class.base.sams,
# aes(x = LD2, y = LD1, fill=Group), shape=23, alpha=0.1)+
geom_point(data = class.base,
aes(x = LD2, y = LD1, fill=Group, alpha = ""),
shape=21, color="black", size=3, stroke=1)+
scale_alpha_manual(values = c(1,1,1,1))+
geom_point(data = class.base.MCMC,
aes(x = LD2, y = LD1, fill=Group),
size=3.5, shape=23, stroke=1)+
labs(fill = "Posterior 95% HDI and Mode",
color = "Zooplankton Sample",
alpha = "True Value",
shape = "Organic Matter Source"
)+
guides(color=FALSE)
Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
of ggplot2 3.3.4.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
We also want to look at the mean_z parameter for each zooplankton sample to see if and where the posteriors are deviating from the data.
## Generating AA δ15N comparison plot
posts.zoops.long$zoop$thin$Group <- as.factor(posts.zoops.long$zoop$thin$Group)
AAplots <-
ggplot(Sources.long)+
geom_point(aes(x = Tracer, y = Value, color = Group),
position = position_nudge(x = 0.2))+
geom_point(data = Zoops.long,
aes(x = Tracer, y = Value, fill="simulated zooplankton"), pch = "triangle",
position = position_nudge(x = -0.2))+
geom_point(data = posts.zoops.long$zoop$thin[which(posts.zoops.long$zoop$thin$Tracer %in% Tracers.mixing),],
aes(x = Tracer, y = Value), color = "grey60", alpha=0.1, pch=16,
position = position_dodge2(width=0.1))+
geom_point(data = posts.zoops.long$zoop$mean[which(posts.zoops.long$zoop$mean$Tracer %in% Tracers.mixing),],
aes(x = Tracer, y = Value, shape="posterior mean"), color = "brown", alpha=1,
position = position_dodge2(width = 0.1))+
labs(color="Organic Matter Source", shape="Zooplankton Posterior", fill="Zooplankton Samples")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.3, hjust=1))
AAplots
We also want to plot comparisons of the trophic parameters used to simulate the zooplankton data and the model posteriors for those parameters.
posts.zoops$trophic$samples$model <- as.factor("vague prior")
posts.zoops$trophic$samples$Group <- as.factor(posts.zoops$trophic$samples$Group)
plot.FWL <-
ggplot()+
geom_violin(data = posts.zoops$trophic$samples,
aes(x=Group, y=FWL, fill=""),
alpha=0.4, color="steelblue4", width=1)+
scale_fill_manual(values=c("steelblue1","steelblue4"))+
geom_line(data = posts.zoops$trophic$HDI95,
aes(x = Group, y = FWL, group = Group),
alpha = 0.5, size = 1, color = "steelblue4")+
geom_line(data = posts.zoops$trophic$HDI90,
aes(x = Group, y = FWL, group = Group),
alpha = 0.7, size = 1.5, color = "steelblue4")+
geom_line(data = posts.zoops$trophic$HDI75,
aes(x = Group, y = FWL, group = Group),
alpha = 0.9, size = 2, color = "steelblue4")+
geom_line(data = posts.zoops$trophic$HDI50,
aes(x = Group, y = FWL, group = Group),
alpha = 1, size = 3, color = "steelblue4")+
scale_linetype_manual(values=c(1,1))+
geom_hline(
aes(yintercept=as.numeric(FWL), color=""),
lty=1, size=0.5, alpha=1)+
scale_color_manual(values=c("goldenrod"))+
labs(fill="Model Posterior", color="True Value",lty="95% HDI")+
xlab("Zooplankton Sample") + ylab("FWL")+
coord_cartesian(ylim = c(0,6))
plot.PTS <-
ggplot()+
geom_violin(data = posts.zoops$trophic$samples,
aes(x=Group, y=PTS, fill=""),
alpha=0.4, color="steelblue4", width=1)+
scale_fill_manual(values=c("steelblue1","steelblue4"))+
geom_line(data = posts.zoops$trophic$HDI95,
aes(x = Group, y = PTS, group = Group),
alpha = 0.5, size = 1, color = "steelblue4")+
geom_line(data = posts.zoops$trophic$HDI90,
aes(x = Group, y = PTS, group = Group),
alpha = 0.7, size = 1.5, color = "steelblue4")+
geom_line(data = posts.zoops$trophic$HDI75,
aes(x = Group, y = PTS, group = Group),
alpha = 0.9, size = 2, color = "steelblue4")+
geom_line(data = posts.zoops$trophic$HDI50,
aes(x = Group, y = PTS, group = Group),
alpha = 1, size = 3, color = "steelblue4")+
scale_linetype_manual(values=c(1,1))+
geom_hline(
aes(yintercept=as.numeric(PTS), color=""),
lty=1, size=0.5, alpha=1)+
scale_color_manual(values=c("goldenrod"))+
labs(fill="Model Posterior", color="True Value",lty="95% HDI")+
xlab("Zooplankton Sample") + ylab("PTS") + labs(lty="",shape="",color="")+
coord_cartesian(ylim = c(0,4))
plot.MTS <-
ggplot()+
geom_violin(data = posts.zoops$trophic$samples,
aes(x=Group, y=MTS, fill=""),
alpha=0.4, color="steelblue4", width=1)+
scale_fill_manual(values=c("steelblue1","steelblue4"))+
geom_line(data = posts.zoops$trophic$HDI95,
aes(x = Group, y = MTS, group = Group),
alpha = 0.5, size = 1, color = "steelblue4")+
geom_line(data = posts.zoops$trophic$HDI90,
aes(x = Group, y = MTS, group = Group),
alpha = 0.7, size = 1.5, color = "steelblue4")+
geom_line(data = posts.zoops$trophic$HDI75,
aes(x = Group, y = MTS, group = Group),
alpha = 0.9, size = 2, color = "steelblue4")+
geom_line(data = posts.zoops$trophic$HDI50,
aes(x = Group, y = MTS, group = Group),
alpha = 1, size = 3, color = "steelblue4")+
scale_linetype_manual(values=c(1,1))+
geom_hline(
aes(yintercept=as.numeric(MTS), color=""),
lty=1, size=0.5, alpha=1)+
scale_color_manual(values=c("goldenrod"))+
labs(fill="Model Posterior", color="True Value",lty="95% HDI")+
xlab("Zooplankton Sample") + ylab("MTS") + labs(lty="",shape="",color="")+
coord_cartesian(ylim = c(0,4))
ggarrange(
ggarrange(
plot.MTS + theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.y=element_blank()),
plot.PTS + theme(axis.title.x=element_blank()),
nrow=2, heights = c(1,1.1), legend = "none"),
plot.FWL + theme(axis.title.x=element_blank()),
ncol=2, widths = c(1,1.1), common.legend = TRUE , legend="right")
Last we’ll do the same for mixing parameters. Here we’ll generate simple box plots, but also ternary plots with two of the four organic matter sources pooled on one axis.
## Making ternary plots of mixing parameters
posts.zoops$f$samples$model <- "vague prior"
posts.zoops$f$samples$Group <- as.character(posts.zoops$f$samples$Group)
ggtern(data = posts.zoops$f$samples,
aes(x=Surface,
y=Large,
z=Small))+
# stat_confidence_tern(aes(fill=model), alpha=0.0, size=1,
# breaks=1, geom = "polygon")+
stat_confidence_tern(aes(fill=""), color="grey20", alpha=0.4, size=0,
breaks=c(0.95,0.9,0.75,0.5), geom = "polygon")+
scale_fill_manual(values=c("steelblue4","steelblue1"))+
geom_point(data = zoops.sim,
aes(x=Surface,
y=Large,
z=Small,
shape=""),
color="grey10",size=3, stroke=1, fill="goldenrod")+
scale_shape_manual(values=c(24))+
theme_bw()+
theme(legend.box="vertical",
legend.position = "right",
tern.axis.title.T=element_blank(),
tern.axis.title.L=element_blank(),
tern.axis.title.R=element_blank())+
theme_arrowlarge()+
scale_shape_manual(values = c(24))+
labs(fill="Model Posterior",shape="True Value",color="95% HDI")+
facet_wrap(~ Group, nrow = 3)
Warning in geom_point(data = zoops.sim, aes(x = Surface, y = Large, z = Small,
: Ignoring unknown aesthetics: z
Scale for shape is already present.
Adding another scale for shape, which will replace the existing scale.
Warning: Combining variables of class <character> and <integer> was deprecated in
ggplot2 3.4.0.
ℹ Please ensure your variables are compatible before plotting (location:
`combine_vars()`)
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
Warning: Combining variables of class <integer> and <character> was deprecated in
ggplot2 3.4.0.
ℹ Please ensure your variables are compatible before plotting (location:
`combine_vars()`)
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
theme_set(theme_classic2()+
theme(panel.grid.major.x = element_line(colour = "grey95"),
panel.grid.major.y = element_line(colour = "grey95")))
nzoops=zoops.n
plot.Surface <-
ggplot()+
geom_violin(data = posts.zoops$f$samples,
aes(x=as.integer(Group), y=Surface, group = Group, fill=""),
alpha=0.4, color="lightskyblue1", width=1)+
scale_fill_manual(values=c("steelblue1","steelblue4"))+
geom_line(data = posts.zoops$f$HDI95,
aes(x = Group, y = Surface, group = Group),
alpha = 0.5, size = 1, color = "steelblue4")+
geom_line(data = posts.zoops$f$HDI90,
aes(x = Group, y = Surface, group = Group),
alpha = 0.7, size = 1.5, color = "steelblue4")+
geom_line(data = posts.zoops$f$HDI75,
aes(x = Group, y = Surface, group = Group),
alpha = 0.9, size = 2, color = "steelblue4")+
geom_line(data = posts.zoops$f$HDI50,
aes(x = Group, y = Surface, group = Group),
alpha = 1, size = 3, color = "steelblue4")+
geom_segment(
aes(y=zoops.f$Surface, x=seq(1,nzoops)-0.5, xend=seq(1,nzoops)+0.5,
color=""),
lty=1,size=0.5,alpha=1)+
scale_color_manual(values=c("goldenrod"))+
labs(fill="Model Posterior", color="True Value", lty="95% HDI")+
xlab("Zooplankton Sample") + ylab("surface") +
scale_x_continuous(breaks = seq(1,N_Z)) +
coord_cartesian(xlim = c(1,N_Z), ylim = c(0,1))
plot.Large <-
ggplot()+
geom_violin(data = posts.zoops$f$samples,
aes(x=as.integer(Group), y=Large, group = Group, fill=""),
alpha=0.4, color="lightskyblue1", width=1)+
scale_fill_manual(values=c("steelblue1","steelblue4"))+
geom_line(data = posts.zoops$f$HDI95,
aes(x = Group, y = Large, group = Group),
alpha = 0.5, size = 1, color = "steelblue4")+
geom_line(data = posts.zoops$f$HDI90,
aes(x = Group, y = Large, group = Group),
alpha = 0.7, size = 1.5, color = "steelblue4")+
geom_line(data = posts.zoops$f$HDI75,
aes(x = Group, y = Large, group = Group),
alpha = 0.9, size = 2, color = "steelblue4")+
geom_line(data = posts.zoops$f$HDI50,
aes(x = Group, y = Large, group = Group),
alpha = 1, size = 3, color = "steelblue4")+
geom_segment(
aes(y=zoops.f$Large, x=seq(1,nzoops)-0.5, xend=seq(1,nzoops)+0.5,
color=""),
lty=1,size=0.5,alpha=1)+
scale_color_manual(values=c("goldenrod"))+
labs(fill="Model", color="Actual")+
xlab("Zooplankton Sample") + ylab("large") +
scale_x_continuous(breaks = seq(1,N_Z)) +
coord_cartesian(xlim = c(1,N_Z), ylim = c(0,1))
plot.Small <-
ggplot()+
geom_violin(data = posts.zoops$f$samples,
aes(x=as.integer(Group), y=Small, group = Group, fill=""),
alpha=0.4, color="lightskyblue1", width=1)+
scale_fill_manual(values=c("steelblue1","steelblue4"))+
geom_line(data = posts.zoops$f$HDI95,
aes(x = Group, y = Small, group = Group),
alpha = 0.5, size = 1, color = "steelblue4")+
geom_line(data = posts.zoops$f$HDI90,
aes(x = Group, y = Small, group = Group),
alpha = 0.7, size = 1.5, color = "steelblue4")+
geom_line(data = posts.zoops$f$HDI75,
aes(x = Group, y = Small, group = Group),
alpha = 0.9, size = 2, color = "steelblue4")+
geom_line(data = posts.zoops$f$HDI50,
aes(x = Group, y = Small, group = Group),
alpha = 1, size = 3, color = "steelblue4")+
geom_segment(
aes(y=zoops.f$Small, x=seq(1,nzoops)-0.5, xend=seq(1,nzoops)+0.5,
color=""),
lty=1,size=0.5,alpha=1)+
scale_color_manual(values=c("goldenrod"))+
labs(fill="Model", color="Actual")+
xlab("Zooplankton Sample") + ylab("small") +
scale_x_continuous(breaks = seq(1,N_Z)) +
coord_cartesian(xlim = c(1,N_Z), ylim = c(0,1))
ggarrange(
plot.Surface + no.x.axis,
plot.Large + no.x.axis,
plot.Small + theme(axis.title.x = element_blank()),
nrow = 3, ncol = 1, heights = c(1,1.1), legend = "none", common.legend = TRUE)
The last things we want to do here is export out mixing model results to a viewable table and a .xlsx file.
save(posts.global, posts.global.long,
posts.zoops, posts.zoops.long,
Data.all, zoops.sim, zoops.f,
file = "OSPsim_OMSM1_posteriors.RData")